In [2]:
import google.datalab.bigquery as bq
In [3]:
# Query 생성
query_string = '''
#standardSQL
SELECT corpus AS title, COUNT(*) AS unique_words
FROM `publicdata.samples.shakespeare`
GROUP BY title
ORDER BY unique_words DESC
LIMIT 10
'''
In [4]:
query = bq.Query(query_string)
In [5]:
output_options = bq.QueryOutput.table(use_cache=True)
result = query.execute(output_options=output_options).result() # query 실행
result
Out[5]:
In [6]:
pandas_df = result.to_dataframe()
In [7]:
pandas_df
Out[7]:
In [8]:
sample_dataset = bq.Dataset('bigquery-public-data.samples')
In [9]:
# dataset이 존재하는지 유무
sample_dataset.exists()
Out[9]:
In [10]:
%bq datasets list --project cloud-datalab-samples
Out[10]:
In [12]:
%%bq query
#standardSQL
SELECT corpus AS title, COUNT(*) AS unique_words
FROM `publicdata.samples.shakespeare`
GROUP BY title
ORDER BY unique_words DESC
LIMIT 10
Out[12]:
In [17]:
%%bq query -n requests
SELECT timestamp, latency, endpoint
FROM `cloud-datalab-samples.httplogs.logs_20140615`
WHERE endpoint = 'Popular' OR endpoint = 'Recent'
In [18]:
df = requests.execute(output_options=bq.QueryOutput.dataframe()).result()
len(df)
Out[18]:
In [19]:
df.head()
Out[19]:
In [13]:
%%bq query --name data
WITH quantiles AS (
SELECT APPROX_QUANTILES(LOG10(latency), 50) AS timearray
FROM `cloud-datalab-samples.httplogs.logs_20140615`
WHERE latency <> 0
)
select row_number() over(order by time) as percentile, time from quantiles cross join unnest(quantiles.timearray) as time
order by percentile
In [14]:
%chart columns --data data --fields percentile,time
Out[14]: